{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-10-11T02:50:44.813116Z",
     "start_time": "2025-10-11T02:50:44.416792Z"
    }
   },
   "source": [
    "import pandas as pd\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import OneHotEncoder"
   ],
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-10-11T02:50:31.621395Z",
     "start_time": "2025-10-11T02:50:31.601969Z"
    }
   },
   "cell_type": "code",
   "source": "data = pd.read_csv('../../dataset/西瓜品质数据集1.csv', encoding='gbk')",
   "id": "76d0f3f47da1a3e6",
   "outputs": [],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-10-11T02:53:18.481584Z",
     "start_time": "2025-10-11T02:53:18.464804Z"
    }
   },
   "cell_type": "code",
   "source": "data",
   "id": "795a873b09f9e8b0",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "    编号  色泽  根蒂  敲声  纹理  脐部  触感     密度    含糖率 好瓜\n",
       "0    1  青绿  蜷缩  浊响  清晰  凹陷  硬滑  0.697  0.460  是\n",
       "1    2  乌黑  蜷缩  沉闷  清晰  凹陷  硬滑  0.774  0.376  是\n",
       "2    3  乌黑  蜷缩  浊响  清晰  凹陷  硬滑  0.634  0.264  是\n",
       "3    4  青绿  蜷缩  沉闷  清晰  凹陷  硬滑  0.608  0.318  是\n",
       "4    5  浅白  蜷缩  浊响  清晰  凹陷  硬滑  0.556  0.215  是\n",
       "5    6  青绿  稍蜷  浊响  清晰  稍凹  软粘  0.403  0.237  是\n",
       "6    7  乌黑  稍蜷  浊响  稍糊  稍凹  软粘  0.481  0.149  是\n",
       "7    8  乌黑  稍蜷  浊响  清晰  稍凹  硬滑  0.437  0.211  是\n",
       "8    9  乌黑  稍蜷  沉闷  稍糊  稍凹  硬滑  0.666  0.091  否\n",
       "9   10  青绿  硬挺  清脆  清晰  平坦  软粘  0.243  0.267  否\n",
       "10  11  浅白  硬挺  清脆  模糊  平坦  硬滑  0.245  0.057  否\n",
       "11  12  浅白  蜷缩  浊响  平坦  平坦  软粘  0.343  0.099  否\n",
       "12  13  青绿  稍蜷  浊响  稍糊  凹陷  硬滑  0.639  0.161  否\n",
       "13  14  浅白  稍蜷  沉闷  稍糊  凹陷  硬滑  0.657  0.198  否\n",
       "14  15  乌黑  稍蜷  浊响  清晰  稍凹  软粘  0.360  0.370  否\n",
       "15  16  浅白  蜷缩  浊响  模糊  平坦  硬滑  0.593  0.042  否\n",
       "16  17  青绿  蜷缩  沉闷  稍糊  稍凹  硬滑  0.719  0.103  否"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>编号</th>\n",
       "      <th>色泽</th>\n",
       "      <th>根蒂</th>\n",
       "      <th>敲声</th>\n",
       "      <th>纹理</th>\n",
       "      <th>脐部</th>\n",
       "      <th>触感</th>\n",
       "      <th>密度</th>\n",
       "      <th>含糖率</th>\n",
       "      <th>好瓜</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.697</td>\n",
       "      <td>0.460</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.774</td>\n",
       "      <td>0.376</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.634</td>\n",
       "      <td>0.264</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.608</td>\n",
       "      <td>0.318</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>浅白</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.556</td>\n",
       "      <td>0.215</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>青绿</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.403</td>\n",
       "      <td>0.237</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.481</td>\n",
       "      <td>0.149</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.211</td>\n",
       "      <td>是</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.666</td>\n",
       "      <td>0.091</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>青绿</td>\n",
       "      <td>硬挺</td>\n",
       "      <td>清脆</td>\n",
       "      <td>清晰</td>\n",
       "      <td>平坦</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.243</td>\n",
       "      <td>0.267</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>浅白</td>\n",
       "      <td>硬挺</td>\n",
       "      <td>清脆</td>\n",
       "      <td>模糊</td>\n",
       "      <td>平坦</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.245</td>\n",
       "      <td>0.057</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>浅白</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>平坦</td>\n",
       "      <td>平坦</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.343</td>\n",
       "      <td>0.099</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>青绿</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.639</td>\n",
       "      <td>0.161</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>浅白</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.657</td>\n",
       "      <td>0.198</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.360</td>\n",
       "      <td>0.370</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>浅白</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>模糊</td>\n",
       "      <td>平坦</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.593</td>\n",
       "      <td>0.042</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.719</td>\n",
       "      <td>0.103</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-10-11T02:53:31.043608Z",
     "start_time": "2025-10-11T02:53:31.024151Z"
    }
   },
   "cell_type": "code",
   "source": "data.columns",
   "id": "d2ec01c87df9b297",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['编号', '色泽', '根蒂', '敲声', '纹理', '脐部', '触感', '密度', '含糖率', '好瓜'], dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-10-11T02:57:54.675701Z",
     "start_time": "2025-10-11T02:57:54.653559Z"
    }
   },
   "cell_type": "code",
   "source": [
    "X = data[['色泽', '根蒂', '敲声', '纹理', '脐部', '触感', '密度', '含糖率']].values\n",
    "y = data['好瓜'].values\n",
    "encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)\n",
    "encoded_data = encoder.fit_transform(X)\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    encoded_data, y, test_size=0.3, random_state=1\n",
    ")\n",
    "model = DecisionTreeClassifier()\n",
    "model.fit(X_train, y_train)\n",
    "new_data = [['浅白', '稍蜷', '沉闷', '清晰', '凹陷', '硬滑', 0.447, 0.460]]\n",
    "new_encoded_data = encoder.transform(new_data)\n",
    "prediction = model.predict(new_encoded_data)\n",
    "print(prediction)"
   ],
   "id": "448834b69fa04c10",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['否']\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "d5e18d57a47803af"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
